Goal of this analysis is to explore the sensitivity of NEFSC size spectra slopes to adjustments in minimum/maximum bodymass cutoffs.
Additional exploration will be done to examine catch composition through:
Percent of total biomass sourced from different classes of fishes
Cumulative distribution figures, examining the % of total biomass within discrete weight bins
Detailed code for the data builds and summary functions can be found here.
One way to address differences in catchability of different sized fishes is by setting a minimum biomass threshold for the size spectra analysis. Ideally the minimum biomass threshold is set so that you retain all sizes that are reliably caught by the sampling gear. You also would like to see the size-spectrum equations to not shift dramatically by small adjustments in this cutoff.
For a starting point in the sensitivity analysis I will use all the data and calculate slopes for all the data across an evenly spaced range of cutoff values similar to a grid search.
#range of size cutoffs
size_cutoffs <- c(seq(0, 50, by = 2.5))
# Calculate the slope/intercepts
size_cutoff_results <- map(size_cutoffs, function(cutoff_size) {
filtered_data <- dataBin %>%
filter(wmin >= cutoff_size)
grouped_data <- filtered_data %>%
mutate(group_level = "all_data") %>%
split(.$group_level)
# get SS results
group_results <- grouped_data %>%
imap_dfr(group_mle_calc) %>%
mutate(stdErr = (abs(confMin - b) + abs(confMax - b)) / (2 * 1.96),
Year = "all",
season = "all",
area = "all",
C = (b != -1 ) * (b + 1) / ( xmax^(b + 1) - xmin^(b + 1) ) + (b == -1) * 1 / ( log(xmax) - log(xmin)))
return(group_results)
}) %>%
setNames(str_c(size_cutoffs, " grams")) %>%
bind_rows(.id = "cutoff_g")
# Format the Slope results so the cutoff is numeric
size_cutoff_results <- size_cutoff_results %>%
mutate(cutoff = str_sub(cutoff_g, 1, -7),
cutoff = as.numeric(cutoff))
Slope seems to decline in an nearly linear way as minimum bodysize cutoff increases. Thi linear relationship makes sense because you are removing the biomass of smaller fishes, meaning the remaining larger fishers take up a larger proportion of the total biomass. This makes me wonder whether the actual slope value is important, or whether the relative changes in slope may be more important. It would seem like as long as the cutoff is consistent between groups that they could be compared against one-another for inference.
In the Edwards vignettes a cutoff of 5g is used, which is referenced to a paper by Blanchard et al., 2005.
size_cutoff_results %>%
ggplot(aes(x = cutoff, y = b)) +
geom_point() +
geom_smooth(formula = y ~ x, method = "lm", se = TRUE, size = 1) +
stat_poly_eq(formula = y ~ x, parse = TRUE,
label.y = "top", label.x = "right") +
labs(x = "Minimum Bodymass Cutoff (g)", y = "Size Spectrum Slope (b)")
So C is the other parameter in the power law distribution… Check back to see what it refers to.
size_cutoff_results %>%
ggplot(aes(x = cutoff, y = C)) +
geom_point() +
geom_smooth(formula = y ~ x, method = "lm", se = FALSE, size = 1) +
stat_poly_eq(formula = y ~ x, parse = TRUE,
label.y = "top", label.x = "left") +
labs(x = "Minimum Bodymass Cutoff (g)", y = "Size Spectrum Intercept? (C)")
Independently of the slope and intercept for each group there may be value in seeing where across the spectrum of individual biomasses the majority of bodymass resides.
Not sure if cumulative density plots of stacked barplots are more helpful so I will explore both.
# cumulative distribution data prep
# Annual Totals
totals <- dataBin %>%
group_by(Year) %>%
summarise(`Total Annual Biomass` = sum(Biomass)) %>%
ungroup()
# Size Bin Data Prep
bin_totals <- dataBin %>%
group_by(Year, `Bodymass Bin (g)`) %>%
summarise(`Total Bin Biomass` = sum(Biomass)) %>%
ungroup() %>%
left_join(totals, by = "Year") %>%
mutate(`Percent of Annual Biomass` = (`Total Bin Biomass` / `Total Annual Biomass`) * 100 )
# Species Class Bins Data Prep
class_totals <- dataBin %>%
group_by(Year, spec_class) %>%
summarise(`Total Class Biomass` = sum(Biomass)) %>%
ungroup() %>%
left_join(totals, by = "Year") %>%
mutate(`Percent of Annual Biomass` = (`Total Class Biomass` / `Total Annual Biomass`) * 100 ,
`Species Category` = spec_class)
########## Cumulative Sum Plots Prep ##################
# 1. Data Prep for cumulative sums on size bins
size_cumsums <- bin_totals %>%
arrange(Year, `Bodymass Bin (g)`) %>%
group_by(Year) %>%
mutate(`Cumulative Biomass (kg)` = cumsum(`Total Bin Biomass`) / 1000,
`Proportion of Annual Biomass` = `Cumulative Biomass (kg)` / (`Total Annual Biomass` / 1000),
`Weight Bin` = as.numeric(factor(`Bodymass Bin (g)`)),
`Weight Bin` = as.factor(`Weight Bin`))
# 2. Cumulative sums on species class
class_cumsums <- class_totals %>%
arrange(Year, `Species Category`) %>%
group_by(Year) %>%
mutate(`Cumulative Biomass (kg)` = cumsum(`Total Class Biomass`) / 1000,
`Proportion of Annual Biomass` = `Cumulative Biomass (kg)` / (`Total Annual Biomass` / 1000),
`Species Class` = as.numeric(factor(`Species Category`)),
`Species class` = as.factor(`Species Class`))
# 3. Cumulative Sums on lengths
lmin_cumsums <- dataBin %>%
group_by(Year, LngtMin) %>%
summarise(`Total Bin Biomass` = sum(Biomass)) %>%
ungroup() %>%
left_join(totals, by = "Year") %>%
mutate(`Percent of Annual Biomass (g)` = (`Total Bin Biomass` / `Total Annual Biomass`) * 100 ) %>%
arrange(Year, LngtMin) %>%
group_by(Year) %>%
mutate(`Cumulative Biomass (kg)` = cumsum(`Total Bin Biomass`) / 1000,
`Proportion of Annual Biomass` = `Cumulative Biomass (kg)` / (`Total Annual Biomass` / 1000),
`Length (cm)` = as.numeric(factor(LngtMin)))
# Size Bins merging with totals - This one could be better as cumulative distirbution plot (s)
bin_totals %>%
ggplot(aes(x = fct_rev(Year), y = `Percent of Annual Biomass`, fill = `Bodymass Bin (g)`)) +
geom_col(color = "white", size = 0.2) +
labs(x = "", y = "Proportion of Total Annual Biomass") +
coord_flip() +
scale_fill_gmri() +
scale_y_continuous(position = "right") +
guides("fill" = guide_legend(title.position = "top", title.hjust = 0.5)) +
theme(legend.position = "bottom")
# Class Proportions
class_totals %>%
ggplot(aes(x = fct_rev(Year), y = `Percent of Annual Biomass`, fill = `Species Category`)) +
geom_col(color = "white", size = 0.2) +
labs(x = "", y = "Proportion of Total Annual Biomass") +
coord_flip() +
scale_fill_gmri() +
scale_y_continuous(position = "right", ) +
guides("fill" = guide_legend(title.position = "top", title.hjust = 0.5)) +
theme(legend.position = "bottom")
# Plot the cumulative plots by bodymass bins, by decade
decade_plots_size <- size_cumsums %>%
mutate(decade = floor_decade(Year)) %>%
split(.$decade) %>%
map(function(decade_group){
# Drop levels
levels_dropped <- decade_group %>%
mutate(Year = forcats:::fct_drop(Year))
totals_dropped <- filter(totals, Year %in% levels_dropped$Year) %>%
mutate(Year = forcats:::fct_drop(Year))
# Cumulative Biomass
biomass_plot <- levels_dropped %>%
ggplot(aes(x = `Weight Bin`, y = `Total Bin Biomass` / 1000)) +
geom_col(aes(fill = `Bodymass Bin (g)`)) +
geom_hline(data = totals_dropped, aes(yintercept = (`Total Annual Biomass` / 1000) / 2),
linetype = 2, alpha = 0.5, color = "royalblue") +
facet_wrap(~Year, ncol = 5) +
scale_y_continuous(labels = scales::comma_format()) +
scale_fill_gmri() +
theme(legend.position = "none") +
labs(y = "Total Bin Biomass (kg)")
# Proportion of Total Biomass
proportion_plot <- levels_dropped %>%
ggplot(aes(x = `Weight Bin`, y = `Percent of Annual Biomass`)) +
geom_col(aes(fill = `Bodymass Bin (g)`)) +
geom_hline(yintercept = 50, linetype = 2, alpha = 0.5, color = "royalblue") +
facet_wrap(~Year, ncol = 5) +
scale_fill_gmri() +
guides("color" = guide_legend(title.position = "top", title.hjust = 0.5)) +
theme(legend.position = "top")
stacked_plot <- biomass_plot / proportion_plot
return(list("mass" = biomass_plot,
"proportion" = proportion_plot))
})
# And plot
decade_plots_size$`1980`$mass
decade_plots_size$`1980`$proportion
decade_plots_size$`1990`$mass
decade_plots_size$`1990`$proportion
decade_plots_size$`2000`$mass
decade_plots_size$`2000`$proportion
decade_plots_size$`2010`$mass
decade_plots_size$`2010`$proportion
#### Column Plots for the different classes
# And do the same for the species classes
decade_plots_class <- class_cumsums %>%
mutate(decade = floor_decade(Year)) %>%
split(.$decade) %>%
map(function(decade_group){
# Drop levels
levels_dropped <- decade_group %>%
mutate(Year = forcats:::fct_drop(Year))
totals_dropped <- filter(totals, Year %in% levels_dropped$Year) %>%
mutate(Year = forcats:::fct_drop(Year))
# Cumulative Biomass
biomass_plot <- levels_dropped %>%
ggplot(aes(x = `Species Class`, y = `Total Class Biomass` / 1000)) +
geom_col(aes(fill = `Species Category`)) +
geom_hline(data = totals_dropped, aes(yintercept = (`Total Annual Biomass` / 1000) / 2),
linetype = 2, alpha = 0.5, color = "royalblue") +
facet_wrap(~Year) +
scale_fill_gmri() +
scale_y_continuous(labels = scales::comma_format()) +
theme(legend.position = "none") +
labs(y = "Total Class Biomass (kg)")
# Proportion of Total Biomass
proportion_plot <- levels_dropped %>%
ggplot(aes(x = `Species Class`, y = `Percent of Annual Biomass`)) +
geom_col(aes(fill = `Species Category`)) +
geom_hline(yintercept = 50, linetype = 2, alpha = 0.5, color = "royalblue") +
facet_wrap(~Year) +
scale_fill_gmri() +
guides("color" = guide_legend(title.position = "top", title.hjust = 0.5)) +
theme(legend.position = "top")
stacked_plot <- biomass_plot / proportion_plot
return(list("mass" = biomass_plot,
"proportion" = proportion_plot))
})
# And plot
decade_plots_class$`1980`$mass
decade_plots_class$`1980`$proportion
decade_plots_class$`1990`$mass
decade_plots_class$`1990`$proportion
decade_plots_class$`2000`$mass
decade_plots_class$`2000`$proportion
decade_plots_class$`2010`$mass
decade_plots_class$`2010`$proportion
#### Plotting CDP of lmin instead of mass bins ####
decade_plots_length <- lmin_cumsums %>%
mutate(decade = floor_decade(Year)) %>%
split(.$decade) %>%
map(function(decade_group){
# Drop levels
levels_dropped <- decade_group %>%
mutate(Year = forcats:::fct_drop(Year))
totals_dropped <- filter(totals, Year %in% levels_dropped$Year) %>%
mutate(Year = forcats:::fct_drop(Year))
# Cumulative Biomass
biomass_plot <- levels_dropped %>%
ggplot(aes(x = `Length (cm)`, y = `Cumulative Biomass (kg)`)) +
geom_step(group = 1, size = 1) +
geom_hline(data = totals_dropped, aes(yintercept = (`Total Annual Biomass` / 1000) / 2),
linetype = 2, alpha = 0.5, color = "royalblue") +
scale_y_continuous(labels = scales::comma_format()) +
facet_wrap(~Year, ncol = 5) +
theme(legend.position = "none")
# Proportion of Total Biomass
proportion_plot <- levels_dropped %>%
ggplot(aes(x = `Length (cm)`, y = `Proportion of Annual Biomass`)) +
geom_step(group = 1, size = 1) +
geom_hline(yintercept = 0.5, linetype = 2, alpha = 0.5, color = "royalblue") +
facet_wrap(~Year, ncol = 5)
stacked_plot <- biomass_plot / proportion_plot
return(list("mass" = biomass_plot,
"proportion" = proportion_plot))
})
# And plot
decade_plots_length$`1980`$mass
decade_plots_length$`1980`$proportion
decade_plots_length$`1990`$mass
decade_plots_length$`1990`$proportion
decade_plots_length$`2000`$mass
decade_plots_length$`2000`$proportion
decade_plots_length$`2010`$mass
decade_plots_length$`2010`$proportion
For these we want to create polygons using the stratum for each region, that way there is an identical area used for calculation of regional anomalies.
OISST Timeseries for each region are then calculated in python, and available to access via box
# Want to match up this df
results_5g <- min5g %>%
filter(Year != "all") %>%
mutate(
Year = as.numeric(Year),
region = case_when(
area == "all" ~ "All Regions",
area == "GB" ~ "Georges Bank",
area == "GoM" ~ "Gulf of Maine",
area == "MAB" ~ "Mid-Atlantic Bight",
area == "SNE" ~ "Southern New England"
)
)
# More formatting
regional_temps <- region_temps %>%
mutate(Year = lubridate::year(time),
est_month = lubridate::month(time),
season = case_when(
est_month %in% c(3:5) ~ "SPRING",
est_month %in% c(6:8) ~ "SUMMER",
est_month %in% c(9:11) ~ "FALL",
est_month %in% c(1,2,12) ~ "Winter" ))
# To these timelines
region_temps %>%
ggplot(aes(time, sst)) +
geom_line(color = "gray50") +
facet_wrap(~region, ncol = 2) +
labs(x = "", y = "Sea Surface Temperature")
# So some averaging to get spring/fall and annual temp and anoms
# and also some lagging action
# Spring primarily in March, April or May,
# Fall primarily in Sept, Oct, Nov
# formatting to combine with trawl data
overall_temps <- regional_temps %>%
mutate(region = "All Regions") %>%
group_by(Year, region, season) %>%
summarise(avg_sst = mean(sst, na.rm = T),
avg_sst_clim = mean(sst_clim, na.rm = T),
avg_sst_anom = mean(sst_anom, na.rm = T))
seasonal_temps <- regional_temps %>%
group_by(Year, region, season) %>%
summarise(
avg_sst = mean(sst, na.rm = T),
avg_sst_clim = mean(sst_clim, na.rm = T),
avg_sst_anom = mean(sst_anom, na.rm = T))
seasonal_temps <- bind_rows(overall_temps, seasonal_temps) %>% arrange(Year, region, season)
# Merge them
results_w_temps <- left_join(results_5g, seasonal_temps, by = c("Year", "region", "season")) %>%
drop_na(avg_sst)
# And plot
results_w_temps %>%
mutate(season = str_to_lower(season),
season = factor(season, levels = c("spring", "fall"))) %>%
ggplot(aes(avg_sst_anom, b)) +
geom_point() +
geom_smooth(formula = y ~ x,
size = 1,
method = "lm") +
stat_poly_eq(formula = y ~ x, parse = TRUE,
label.y = "bottom", label.x = "right") +
facet_grid(season ~ region) +
labs(x = "Seasonal Temperature Anomaly", y = "Size Spectrum Slope (b)")
These are the results from using a 5 gram bodysize cutoff.
# # Plotting all 5gram cutoff results
min5g <- min5g %>%
mutate(Year = as.numeric(as.character(Year)),
season = case_when(season == "all" ~ "Spring + Fall",
season == "SPRING" ~ "Spring",
season == "FALL" ~ "Fall"),
season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))
min5g %>%
ggplot(aes(Year, b, color = area, shape = season)) +
geom_pointrange(aes(x = Year, y = b, ymin = confMin, ymax = confMax), alpha = 0.5) +
geom_hline(data = filter(min5g, is.na(Year)),
aes(yintercept = b), linetype = 2, alpha = 0.7, size = 1) +
facet_grid(season ~ area) +
geom_smooth(method = "gam", show.legend = FALSE, se = FALSE,
formula = y ~ s(x, bs = "cs")) +
theme_bw() +theme(axis.text.x = element_text(angle = 90, size= 6, vjust = 0.5)) +
scale_color_gmri() +
labs() +
guides(shape = "none",
color = "none") +
labs(x = "",
caption = "Horizontal line represents the mean across all years for facet group.",
y = "Size Spectrum Slope (b)")